# -*- coding: utf-8 -*-
import copy
from scrapy import Request
from scrapy.exceptions import IgnoreRequest
from zc_core.dao.item_data_dao import ItemDataDao
from zc_core.util.batch_gen import time_to_batch_no
from zc_core.util.http_util import retry_request
from zc_core.dao.batch_dao import BatchDao
from zc_core.util.done_filter import DoneFilter
from zc_core.spiders.base import BaseSpider
from cnncmall.rules import *


class PriceSpider(BaseSpider):
    name = "price"
    custom_settings = {
        'CONCURRENT_REQUESTS': 12,
        # 'DOWNLOAD_DELAY': 0.1,
        'CONCURRENT_REQUESTS_PER_DOMAIN': 12,
        'CONCURRENT_REQUESTS_PER_IP': 12,
    }
    # 详情页url
    price_url = "https://www.cnncmall.com/pesapp/mall/noauth/queryCurrentPrice"

    def __init__(self, batchNo=None, *args, **kwargs):
        super(PriceSpider, self).__init__(batchNo=batchNo, *args, **kwargs)
        # 创建批次记录
        BatchDao().create_batch(self.batch_no)
        # 避免重复采集
        self.done_filter = DoneFilter(self.batch_no)
        self.batch_size = 1

    def start_requests(self):
        settings = get_project_settings()
        cat_filter = settings.get('CATALOG_WHITE_LIST', [])
        # cat_filter = []
        sku_list = ItemDataDao().get_batch_data_list(
            self.batch_no,
            fields={'_id': 1, 'supplierId': 1},
            query={'salePrice': None, 'catalog1Id': {'$in': cat_filter}}
        )
        self.logger.info('目标：%s' % (len(sku_list)))

        sp_skus = dict()
        for sku in sku_list:
            sku_id = sku.get("_id")
            sp_id = sku.get("supplierId")

            rows = sp_skus.get(sp_id)
            if not rows:
                rows = list()
                sp_skus[sp_id] = rows
            rows.append(sku_id)

            if rows and len(rows) >= self.batch_size:
                sku_ids = copy.deepcopy(rows)
                yield self._build_req(sku_ids, sp_id)
                rows.clear()

        for key in sp_skus.keys():
            rows = sp_skus.get(key)
            if rows and len(rows):
                sku_ids = copy.deepcopy(rows)
                yield self._build_req(sku_ids, key)

    def _build_req(self, sku_ids, sp_id):
        return Request(
            method='POST',
            url=self.price_url,
            headers={
                'Host': 'www.cnncmall.com',
                'Connection': 'keep-alive',
                'Accept': 'application/json, text/plain, */*',
                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.150 Safari/537.36',
                'auth-token': '123',
                'Content-Type': 'application/json;charset=UTF-8',
                'Origin': 'https://www.cnncmall.com',
                'Sec-Fetch-Site': 'same-origin',
                'Sec-Fetch-Mode': 'cors',
                'Sec-Fetch-Dest': 'empty',
                'Accept-Encoding': 'gzip, deflate, br',
                'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,ja;q=0.7',
            },
            meta={
                'reqType': 'price',
                'batchNo': self.batch_no,
                "skuIds": sku_ids,
                "supplierId": sp_id,
            },
            body=json.dumps({
                "skuIds": sku_ids,
                "supplierShopId": sp_id,
                "companyId": "",
                "isprofess": ""
            }),
            callback=self.parse_price_list,
            errback=self.error_back
        )

    def parse_price_list(self, response):
        meta = response.meta
        sku_ids = meta.get("skuIds")

        sp_id = meta.get("supplierId")
        item_list = parse_price_list(response)
        if item_list:
            if len(sku_ids) == len(item_list):
                self.logger.info('价格: sp=%s, src=%s, dist=%s' % (sp_id, len(sku_ids), len(item_list)))
            else:
                self.logger.info('价格[差异]: sp=%s, src=%s, dist=%s' % (sp_id, len(sku_ids), len(item_list)))
            yield Box('item', self.batch_no, item_list)
        else:
            self.logger.error('空集: skus=%s, sp_id=%s' % (sku_ids, sp_id))


